'''
input.py
'''
from types import NoneType
import os
import re

# Pobieranie slow z dokumentu
def get_words(document):
    words = []
    with open(document, 'r') as doctext:
        for line in doctext:
            wordsset = line.split(None)
            for word in wordsset:
                words.append(word.lower())
    return words


# Przygotowanie zbioru par ('sciezka', 'numer klasy') ze zbioru treningowego (TRAIN_DIR)
def read_docs(docs_dir):
    docs = list()
    for dirname, dirnames, filenames in os.walk(docs_dir):
        for filename in filenames:
            m = re.match(r"\w+[\\/](\w+)", dirname)
            if m != NoneType:
                docs.append((os.path.join(dirname, filename), m.group(1)))
    return docs
